// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "content/common/sandbox_linux/sandbox_linux.h"

#include <dirent.h>
#include <fcntl.h>
#include <stdint.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>

#include <limits>
#include <memory>
#include <string>
#include <vector>

#include "base/bind.h"
#include "base/callback_helpers.h"
#include "base/command_line.h"
#include "base/debug/stack_trace.h"
#include "base/files/scoped_file.h"
#include "base/logging.h"
#include "base/macros.h"
#include "base/memory/ptr_util.h"
#include "base/memory/singleton.h"
#include "base/posix/eintr_wrapper.h"
#include "base/strings/string_number_conversions.h"
#include "base/sys_info.h"
#include "base/time/time.h"
#include "build/build_config.h"
#include "content/common/sandbox_linux/sandbox_seccomp_bpf_linux.h"
#include "content/public/common/content_switches.h"
#include "content/public/common/sandbox_linux.h"
#include "sandbox/linux/services/credentials.h"
#include "sandbox/linux/services/namespace_sandbox.h"
#include "sandbox/linux/services/proc_util.h"
#include "sandbox/linux/services/resource_limits.h"
#include "sandbox/linux/services/thread_helpers.h"
#include "sandbox/linux/services/yama.h"
#include "sandbox/linux/suid/client/setuid_sandbox_client.h"

#if defined(ANY_OF_AMTLU_SANITIZER)
#include <sanitizer/common_interface_defs.h>
#endif

using sandbox::Yama;

namespace {

struct FDCloser {
    inline void operator()(int* fd) const
    {
        DCHECK(fd);
        PCHECK(0 == IGNORE_EINTR(close(*fd)));
        *fd = -1;
    }
};

void LogSandboxStarted(const std::string& sandbox_name)
{
    const base::CommandLine& command_line = *base::CommandLine::ForCurrentProcess();
    const std::string process_type = command_line.GetSwitchValueASCII(switches::kProcessType);
    const std::string activated_sandbox = "Activated " + sandbox_name + " sandbox for process type: " + process_type + ".";
    VLOG(1) << activated_sandbox;
}

bool IsRunningTSAN()
{
#if defined(THREAD_SANITIZER)
    return true;
#else
    return false;
#endif
}

// Get a file descriptor to /proc. Either duplicate |proc_fd| or try to open
// it by using the filesystem directly.
// TODO(jln): get rid of this ugly interface.
base::ScopedFD OpenProc(int proc_fd)
{
    int ret_proc_fd = -1;
    if (proc_fd >= 0) {
        // If a handle to /proc is available, use it. This allows to bypass file
        // system restrictions.
        ret_proc_fd = HANDLE_EINTR(openat(proc_fd, ".", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
    } else {
        // Otherwise, make an attempt to access the file system directly.
        ret_proc_fd = HANDLE_EINTR(
            openat(AT_FDCWD, "/proc/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
    }
    DCHECK_LE(0, ret_proc_fd);
    return base::ScopedFD(ret_proc_fd);
}

} // namespace

namespace content {

LinuxSandbox::LinuxSandbox()
    : proc_fd_(-1)
    , seccomp_bpf_started_(false)
    , sandbox_status_flags_(kSandboxLinuxInvalid)
    , pre_initialized_(false)
    , seccomp_bpf_supported_(false)
    , seccomp_bpf_with_tsync_supported_(false)
    , yama_is_enforcing_(false)
    , initialize_sandbox_ran_(false)
    , setuid_sandbox_client_(sandbox::SetuidSandboxClient::Create())
{
    if (setuid_sandbox_client_ == NULL) {
        LOG(FATAL) << "Failed to instantiate the setuid sandbox client.";
    }
#if defined(ANY_OF_AMTLU_SANITIZER)
    sanitizer_args_ = base::WrapUnique(new __sanitizer_sandbox_arguments);
    *sanitizer_args_ = { 0 };
#endif
}

LinuxSandbox::~LinuxSandbox()
{
    if (pre_initialized_) {
        CHECK(initialize_sandbox_ran_);
    }
}

LinuxSandbox* LinuxSandbox::GetInstance()
{
    LinuxSandbox* instance = base::Singleton<LinuxSandbox>::get();
    CHECK(instance);
    return instance;
}

void LinuxSandbox::PreinitializeSandbox()
{
    CHECK(!pre_initialized_);
    seccomp_bpf_supported_ = false;
#if defined(ANY_OF_AMTLU_SANITIZER)
    // Sanitizers need to open some resources before the sandbox is enabled.
    // This should not fork, not launch threads, not open a directory.
    __sanitizer_sandbox_on_notify(sanitizer_args());
    sanitizer_args_.reset();
#endif

    // Open proc_fd_. It would break the security of the setuid sandbox if it was
    // not closed.
    // If LinuxSandbox::PreinitializeSandbox() runs, InitializeSandbox() must run
    // as well.
    proc_fd_ = HANDLE_EINTR(open("/proc", O_DIRECTORY | O_RDONLY | O_CLOEXEC));
    CHECK_GE(proc_fd_, 0);
    // We "pre-warm" the code that detects supports for seccomp BPF.
    if (SandboxSeccompBPF::IsSeccompBPFDesired()) {
        if (!SandboxSeccompBPF::SupportsSandbox()) {
            VLOG(1) << "Lacking support for seccomp-bpf sandbox.";
        } else {
            seccomp_bpf_supported_ = true;
        }

        if (SandboxSeccompBPF::SupportsSandboxWithTsync()) {
            seccomp_bpf_with_tsync_supported_ = true;
        }
    }

    // Yama is a "global", system-level status. We assume it will not regress
    // after startup.
    const int yama_status = Yama::GetStatus();
    yama_is_enforcing_ = (yama_status & Yama::STATUS_PRESENT) && (yama_status & Yama::STATUS_ENFORCING);
    pre_initialized_ = true;
}

void LinuxSandbox::EngageNamespaceSandbox()
{
    CHECK(pre_initialized_);
    // Check being in a new PID namespace created by the namespace sandbox and
    // being the init process.
    CHECK(sandbox::NamespaceSandbox::InNewPidNamespace());
    const pid_t pid = getpid();
    CHECK_EQ(1, pid);

    CHECK(sandbox::Credentials::MoveToNewUserNS());
    // Note: this requires SealSandbox() to be called later in this process to be
    // safe, as this class is keeping a file descriptor to /proc/.
    CHECK(sandbox::Credentials::DropFileSystemAccess(proc_fd_));

    // We do not drop CAP_SYS_ADMIN because we need it to place each child process
    // in its own PID namespace later on.
    std::vector<sandbox::Credentials::Capability> caps;
    caps.push_back(sandbox::Credentials::Capability::SYS_ADMIN);
    CHECK(sandbox::Credentials::SetCapabilities(proc_fd_, caps));
}

std::vector<int> LinuxSandbox::GetFileDescriptorsToClose()
{
    std::vector<int> fds;
    if (proc_fd_ >= 0) {
        fds.push_back(proc_fd_);
    }
    return fds;
}

bool LinuxSandbox::InitializeSandbox()
{
    LinuxSandbox* linux_sandbox = LinuxSandbox::GetInstance();
    return linux_sandbox->InitializeSandboxImpl();
}

void LinuxSandbox::StopThread(base::Thread* thread)
{
    LinuxSandbox* linux_sandbox = LinuxSandbox::GetInstance();
    linux_sandbox->StopThreadImpl(thread);
}

int LinuxSandbox::GetStatus()
{
    if (!pre_initialized_) {
        return 0;
    }
    if (kSandboxLinuxInvalid == sandbox_status_flags_) {
        // Initialize sandbox_status_flags_.
        sandbox_status_flags_ = 0;
        if (setuid_sandbox_client_->IsSandboxed()) {
            sandbox_status_flags_ |= kSandboxLinuxSUID;
            if (setuid_sandbox_client_->IsInNewPIDNamespace())
                sandbox_status_flags_ |= kSandboxLinuxPIDNS;
            if (setuid_sandbox_client_->IsInNewNETNamespace())
                sandbox_status_flags_ |= kSandboxLinuxNetNS;
        } else if (sandbox::NamespaceSandbox::InNewUserNamespace()) {
            sandbox_status_flags_ |= kSandboxLinuxUserNS;
            if (sandbox::NamespaceSandbox::InNewPidNamespace())
                sandbox_status_flags_ |= kSandboxLinuxPIDNS;
            if (sandbox::NamespaceSandbox::InNewNetNamespace())
                sandbox_status_flags_ |= kSandboxLinuxNetNS;
        }

        // We report whether the sandbox will be activated when renderers, workers
        // and PPAPI plugins go through sandbox initialization.
        if (seccomp_bpf_supported() && SandboxSeccompBPF::ShouldEnableSeccompBPF(switches::kRendererProcess)) {
            sandbox_status_flags_ |= kSandboxLinuxSeccompBPF;
        }

        if (seccomp_bpf_with_tsync_supported() && SandboxSeccompBPF::ShouldEnableSeccompBPF(switches::kRendererProcess)) {
            sandbox_status_flags_ |= kSandboxLinuxSeccompTSYNC;
        }

        if (yama_is_enforcing_) {
            sandbox_status_flags_ |= kSandboxLinuxYama;
        }
    }

    return sandbox_status_flags_;
}

// Threads are counted via /proc/self/task. This is a little hairy because of
// PID namespaces and existing sandboxes, so "self" must really be used instead
// of using the pid.
bool LinuxSandbox::IsSingleThreaded() const
{
    base::ScopedFD proc_fd(OpenProc(proc_fd_));

    CHECK(proc_fd.is_valid()) << "Could not count threads, the sandbox was not "
                              << "pre-initialized properly.";

    const bool is_single_threaded = sandbox::ThreadHelpers::IsSingleThreaded(proc_fd.get());

    return is_single_threaded;
}

bool LinuxSandbox::seccomp_bpf_started() const
{
    return seccomp_bpf_started_;
}

sandbox::SetuidSandboxClient*
LinuxSandbox::setuid_sandbox_client() const
{
    return setuid_sandbox_client_.get();
}

// For seccomp-bpf, we use the SandboxSeccompBPF class.
bool LinuxSandbox::StartSeccompBPF(const std::string& process_type)
{
    CHECK(!seccomp_bpf_started_);
    CHECK(pre_initialized_);
    if (seccomp_bpf_supported()) {
        seccomp_bpf_started_ = SandboxSeccompBPF::StartSandbox(process_type, OpenProc(proc_fd_));
    }

    if (seccomp_bpf_started_) {
        LogSandboxStarted("seccomp-bpf");
    }

    return seccomp_bpf_started_;
}

bool LinuxSandbox::InitializeSandboxImpl()
{
    DCHECK(!initialize_sandbox_ran_);
    initialize_sandbox_ran_ = true;

    base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
    const std::string process_type = command_line->GetSwitchValueASCII(switches::kProcessType);

    // We need to make absolutely sure that our sandbox is "sealed" before
    // returning.
    // Unretained() since the current object is a Singleton.
    base::ScopedClosureRunner sandbox_sealer(
        base::Bind(&LinuxSandbox::SealSandbox, base::Unretained(this)));
    // Make sure that this function enables sandboxes as promised by GetStatus().
    // Unretained() since the current object is a Singleton.
    base::ScopedClosureRunner sandbox_promise_keeper(
        base::Bind(&LinuxSandbox::CheckForBrokenPromises,
            base::Unretained(this),
            process_type));

    // No matter what, it's always an error to call InitializeSandbox() after
    // threads have been created.
    if (!IsSingleThreaded()) {
        std::string error_message = "InitializeSandbox() called with multiple "
                                    "threads in process "
            + process_type + ". ";
        // TSAN starts a helper thread, so we don't start the sandbox and don't
        // even report an error about it.
        if (IsRunningTSAN())
            return false;

#if defined(OS_CHROMEOS)
        if (base::SysInfo::IsRunningOnChromeOS() && process_type == switches::kGpuProcess) {
            error_message += "This error can be safely ignored in VMTests.";
        }
#endif

        // The GPU process is allowed to call InitializeSandbox() with threads.
        bool sandbox_failure_fatal = process_type != switches::kGpuProcess;
        // This can be disabled with the '--gpu-sandbox-failures-fatal' flag.
        // Setting the flag with no value or any value different than 'yes' or 'no'
        // is equal to setting '--gpu-sandbox-failures-fatal=yes'.
        if (process_type == switches::kGpuProcess && command_line->HasSwitch(switches::kGpuSandboxFailuresFatal)) {
            const std::string switch_value = command_line->GetSwitchValueASCII(switches::kGpuSandboxFailuresFatal);
            sandbox_failure_fatal = switch_value != "no";
        }

        if (sandbox_failure_fatal)
            LOG(FATAL) << error_message;

        LOG(ERROR) << error_message;
        return false;
    }

    // Only one thread is running, pre-initialize if not already done.
    if (!pre_initialized_)
        PreinitializeSandbox();

    DCHECK(!HasOpenDirectories()) << "InitializeSandbox() called after unexpected directories have been "
                                  << "opened. This breaks the security of the setuid sandbox.";

    // Attempt to limit the future size of the address space of the process.
    LimitAddressSpace(process_type);

    // Try to enable seccomp-bpf.
    bool seccomp_bpf_started = StartSeccompBPF(process_type);

    return seccomp_bpf_started;
}

void LinuxSandbox::StopThreadImpl(base::Thread* thread)
{
    DCHECK(thread);
    StopThreadAndEnsureNotCounted(thread);
}

bool LinuxSandbox::seccomp_bpf_supported() const
{
    CHECK(pre_initialized_);
    return seccomp_bpf_supported_;
}

bool LinuxSandbox::seccomp_bpf_with_tsync_supported() const
{
    CHECK(pre_initialized_);
    return seccomp_bpf_with_tsync_supported_;
}

bool LinuxSandbox::LimitAddressSpace(const std::string& process_type)
{
    (void)process_type;
#if !defined(ANY_OF_AMTLU_SANITIZER)
    base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
    if (command_line->HasSwitch(switches::kNoSandbox)) {
        return false;
    }

    // Limit the address space to 4GB.
    // This is in the hope of making some kernel exploits more complex and less
    // reliable. It also limits sprays a little on 64-bit.
    rlim_t address_space_limit = std::numeric_limits<uint32_t>::max();
#if defined(__LP64__)
    // On 64 bits, V8 and possibly others will reserve massive memory ranges and
    // rely on on-demand paging for allocation.  Unfortunately, even
    // MADV_DONTNEED ranges  count towards RLIMIT_AS so this is not an option.
    // See crbug.com/169327 for a discussion.
    // On the GPU process, irrespective of V8, we can exhaust a 4GB address space
    // under normal usage, see crbug.com/271119
    // For now, increase limit to 16GB for renderer and worker and gpu processes
    // to accomodate.
    if (process_type == switches::kRendererProcess || process_type == switches::kGpuProcess) {
        address_space_limit = 1L << 34;
    }
#endif // defined(__LP64__)

    // On all platforms, add a limit to the brk() heap that would prevent
    // allocations that can't be index by an int.
    const rlim_t kNewDataSegmentMaxSize = std::numeric_limits<int>::max();

    bool limited_as = sandbox::ResourceLimits::Lower(RLIMIT_AS, address_space_limit);
    bool limited_data = sandbox::ResourceLimits::Lower(RLIMIT_DATA, kNewDataSegmentMaxSize);

    // Cache the resource limit before turning on the sandbox.
    base::SysInfo::AmountOfVirtualMemory();

    return limited_as && limited_data;
#else
    base::SysInfo::AmountOfVirtualMemory();
    return false;
#endif // !defined(ADDRESS_SANITIZER) && !defined(MEMORY_SANITIZER) && \
    // !defined(THREAD_SANITIZER)
}

bool LinuxSandbox::HasOpenDirectories() const
{
    return sandbox::ProcUtil::HasOpenDirectory(proc_fd_);
}

void LinuxSandbox::SealSandbox()
{
    if (proc_fd_ >= 0) {
        int ret = IGNORE_EINTR(close(proc_fd_));
        CHECK_EQ(0, ret);
        proc_fd_ = -1;
    }
}

void LinuxSandbox::CheckForBrokenPromises(const std::string& process_type)
{
    // Make sure that any promise made with GetStatus() wasn't broken.
    bool promised_seccomp_bpf_would_start = false;
    if (process_type == switches::kRendererProcess || process_type == switches::kPpapiPluginProcess) {
        promised_seccomp_bpf_would_start = (sandbox_status_flags_ != kSandboxLinuxInvalid) && (GetStatus() & kSandboxLinuxSeccompBPF);
    }
    if (promised_seccomp_bpf_would_start) {
        CHECK(seccomp_bpf_started_);
    }
}

void LinuxSandbox::StopThreadAndEnsureNotCounted(base::Thread* thread) const
{
    DCHECK(thread);
    base::ScopedFD proc_fd(OpenProc(proc_fd_));
    PCHECK(proc_fd.is_valid());
    CHECK(
        sandbox::ThreadHelpers::StopThreadAndWatchProcFS(proc_fd.get(), thread));
}

} // namespace content
